In [1]:
import sys
In [2]:
print("Following are your python version details:\n%s" % sys.version)
In [3]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [4]:
sns.set_context("poster")
sns.set_style("ticks")
In [5]:
print "Numpy version: ", np.__version__
print "Pandas version: ", pd.__version__
print "Matplotlib version: ", plt.matplotlib.__version__
print "Seaborn version: ", sns.__version__
In [6]:
x = np.arange(-10,10,0.14)
y = x**2
print "x.shape: ", x.shape
print "y.shape: ", y.shape
More details at: http://matplotlib.org/users/pyplot_tutorial.html
In [7]:
plt.plot(x,y, marker="o", color="r", label="demo")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.title("Demo plot")
plt.legend()
Out[7]:
More details at: http://pandas.pydata.org/pandas-docs/stable/tutorials.html
In [8]:
df = pd.DataFrame()
df["X"] = x
df["Y"] = y
df["G"] = np.random.randint(1,10,size=x.shape)
df["E"] = np.random.randint(1,5,size=x.shape)
df.shape
Out[8]:
In [9]:
df.head()
Out[9]:
In [10]:
df.describe()
Out[10]:
In [11]:
df.G = df.G.astype("category")
df.E = df.E.astype("category")
More details at: https://stanford.edu/~mwaskom/software/seaborn/index.html
In [12]:
sns.barplot(x="G", y="Y", data=df, estimator=np.mean, color="dodgerblue")
Out[12]:
In [13]:
g = sns.jointplot("X", "Y", data=df, kind="reg",
color="r", size=7)
In [14]:
sns.pairplot(df, hue="E")
Out[14]:
In [15]:
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(df, col="G", hue="E", col_wrap=4, size=3, legend_out=True)
# Draw a horizontal line to show the starting point
grid.map(plt.axhline, y=30, ls=":", c=".5")
# Draw a line plot to show the trajectory of each random walk
t = grid.map(plt.plot, "X", "Y", marker="o", ms=4).add_legend(title="E values")
#grid.fig.tight_layout(w_pad=1)
More details at: http://scikit-learn.org/stable/index.html
In [16]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import classification_report
In [17]:
X = df[["X"]].copy()
y = df["Y"].copy()
print "X.shape: ", X.shape
print "Y.shape: ", y.shape
In [18]:
model_linear = LinearRegression()
model_linear.fit(X, y)
Out[18]:
In [19]:
y_pred = model_linear.predict(X)
print "Y_pred.shape: ", y_pred.shape
In [20]:
X["X^2"] = X["X"]**2
In [21]:
X.columns
Out[21]:
In [22]:
model_sqr = LinearRegression()
model_sqr.fit(X, y)
y_pred_sqr = model_sqr.predict(X)
print "Y_pred_sqr.shape: ", y_pred_sqr.shape
In [23]:
plt.scatter(X["X"], y, marker="o", label="data", alpha=0.5, s=30)
plt.plot(X["X"], y_pred, linestyle="--", linewidth=1.5, color="k", label="fit [linear]")
plt.plot(X["X"], y_pred_sqr, linestyle="--", linewidth=1.5, color="r", label="fit [square]")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
Out[23]:
In [24]:
model_linear.coef_
Out[24]:
In [25]:
model_sqr.coef_
Out[25]:
More details at: http://statsmodels.sourceforge.net/
In [26]:
import statsmodels.api as sm
In [27]:
model = sm.OLS(y, X)
res = model.fit()
res.summary2()
Out[27]:
In [28]:
model = sm.OLS.from_formula("Y ~ X + I(X**2)", data=df)
res = model.fit()
res.summary2()
Out[28]:
In [29]:
X = df[["X", "Y"]]
y = df["E"]
In [30]:
model = LogisticRegression(multi_class="multinomial", solver="lbfgs")
model.fit(X, y)
y_pred = model.predict(X)
print classification_report(y, y_pred)
In [31]:
y_pred_p = model.predict_proba(X)
In [32]:
y_pred_p[:10]
Out[32]:
In [33]:
model = sm.MNLogit.from_formula("E ~ Y + X", data=df)
res = model.fit()
#res.summary2()
In [34]:
res.summary()
Out[34]:
In [ ]: